output_12 <- read.csv(here("data","bivalve_model","LookUpTable_201214.csv"))
output_14 <- read.csv(here("data","bivalve_model","LookUpTable_201416.csv"))
Merging outputs of 2012 and 2014 outputs
combined <- base::merge(output_12, output_14, by = c("site", "long", "lat"), all = TRUE) %>%
mutate(yield_12 = yield.x,
yield_14 = yield.y
) %>%
dplyr::select("site", "long", "lat", "yield_12", "yield_14")
Is there a signififcant difference between the (paired) yield values of the 2012 and 2014 versions of the model?
## converting data frames to be paired vectors##
combined_nocopies <- base::merge(output_12, output_14, by = c("site", "long", "lat"), all = FALSE) %>%
mutate(yield_12 = yield.x,
yield_14 = yield.y
) %>%
dplyr::select("site", "long", "lat", "yield_12", "yield_14")
vec12 <- combined_nocopies %>%
pull(yield_12)
vec14 <- combined_nocopies %>%
pull(yield_14)
## conducting t test ##
t.test(x = vec14, y = vec12, paired = TRUE)
##
## Paired t-test
##
## data: vec14 and vec12
## t = -21.139, df = 4442, p-value < 2.2e-16
## alternative hypothesis: true mean difference is not equal to 0
## 95 percent confidence interval:
## -448882.6 -372689.2
## sample estimates:
## mean difference
## -410785.9
############## OUTPUT ###################
# Paired t-test
#
# data: vec14 and vec12
# t = -21.139, df = 4442, p-value < 2.2e-16
# alternative hypothesis: true mean difference is not equal to 0
# 95 percent confidence interval:
# -448882.6 -372689.2
# sample estimates:
# mean difference
# -410785.9
Yes, these outputs are different.
Calculating the mean and sd values for each year below:
mean(vec12)
## [1] 5985567
sd(vec12)
## [1] 1541905
mean(vec14)
## [1] 5574781
sd(vec14)
## [1] 2260238
combined_long <- combined %>%
pivot_longer(yield_12:yield_14,
names_to = "year",
names_prefix = "yield_",
values_to = "yield")
## yield by latitude
ggplot(data = combined_long, aes(x = lat, y = yield)) +
geom_line(aes(color = year)) +
theme_minimal()
##Yikes this output is messy##
## histogram (shows left skew for both but more dramatic left skew for 2014-16 time frames)
ggplot(data = combined_long, aes(x = yield, fill = year)) +
geom_histogram() +
xlim(c(2300000, 11800000)) +
theme_minimal()
##boxplot and violin plot
ggplot(data = combined_long, aes(x = year, y = yield, fill = year)) +
geom_boxplot() +
geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
theme_minimal()
ggplot(data = combined_long, aes(x = year, y = yield, fill = year)) +
geom_violin() +
#geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
theme_minimal()
Bringing in the SST data to look at if this could be a driver for the difference in model yields across years. This is likely due to the blob, but POC could also be a factor.
To avoid having to rerun the code in netCDFfile_testing.Rmd and netCDFfile_newbivalvedata.Rmd to wrangle the SST data, I’ve written those matricies as csvs and will read them in here to work with them.
# write.csv(matrix_sst_2012, here("data", "matrix_sst_2012.csv"))
# write.csv(matrix_sst_2014, here("data", "matrix_sst_2014.csv"))
matrix_sst_2012 <- read.csv(here("data", "matrix_sst_2012.csv"))
matrix_sst_2014 <- read.csv(here("data", "matrix_sst_2014.csv"))
#need to reformat the SST matricies
sst_2012 <- matrix_sst_2012 %>%
pivot_longer(MODIS.20121001:MODIS.20140301,
names_to = "month",
names_prefix = "MODIS.",
values_to = "SST") %>%
group_by(sites_lon, sites_lat) %>%
summarize(sst_ave = mean(SST)) %>%
mutate(lat = sites_lat,
long = sites_lon,
sst_ave12 = sst_ave) %>%
ungroup() %>%
dplyr::select("long", "lat", "sst_ave12")
sst_2014 <- matrix_sst_2014 %>%
pivot_longer(MODIS.20141001:MODIS.20160301,
names_to = "month",
names_prefix = "MODIS.",
values_to = "SST") %>%
group_by(sites_lon, sites_lat) %>%
summarize(sst_ave = mean(SST)) %>%
mutate(lat = sites_lat,
long = sites_lon,
sst_ave14 = sst_ave) %>%
ungroup() %>%
dplyr::select("long", "lat", "sst_ave14")
all_sst <- base::merge(sst_2012, sst_2014, by = c("long", "lat"), all = TRUE) %>%
pivot_longer(sst_ave12:sst_ave14,
names_to = "year",
names_prefix = "sst_ave",
values_to = "sst_ave")
all_sst_yield <- base::merge (all_sst, combined_long, by = c("lat", "long", "year"), all = TRUE) %>%
dplyr::select("site", "long", "lat", "year", "sst_ave", "yield")
ggplot(data = all_sst_yield, aes(x = sst_ave, y = yield)) +
geom_point(aes(color = year)) +
theme_minimal()
## sst only ##
ggplot(data = all_sst_yield, aes(x = year, y = sst_ave, fill = year)) +
geom_violin() +
#geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
theme_minimal()
#ggplot_build(p)$data
SST versus yield for 2012 and 2014 models
all_12 <- all_sst_yield %>%
filter(year == "12")
ggplot(data = all_12, aes(x = sst_ave, y = yield)) +
geom_point(color = "#F8766D") +
xlim(c(10,21)) +
ylim(c(2500000, 11580000)) +
theme_minimal()
all_14 <- all_sst_yield %>%
filter(year == "14")
ggplot(data = all_14, aes(x = sst_ave, y = yield)) +
geom_point(color = "#00BFC4") +
xlim(c(10,21)) +
ylim(c(2500000, 11580000)) +
theme_minimal()
Similar to the SST work above, bringing in the POC data to look at if this could be a driver for the difference in model yields across years.
To avoid having to rerun the code in netCDFfile_testing.Rmd and netCDFfile_newbivalvedata.Rmd to wrangle the POC data, I’ve written those matricies as csvs and will read them in here to work with them.
# write.csv(matrix_poc_2012, here("data", "matrix_poc_2012.csv"))
# write.csv(matrix_poc_2014, here("data", "matrix_poc_2014.csv"))
matrix_poc_2012 <- read.csv(here("data", "matrix_poc_2012.csv"))
matrix_poc_2014 <- read.csv(here("data", "matrix_poc_2014.csv"))
#need to reformat the POC matricies
poc_2012 <- matrix_poc_2012 %>%
pivot_longer(MODIS.20121001:MODIS.20140301,
names_to = "month",
names_prefix = "MODIS.",
values_to = "POC") %>%
group_by(sites_lon, sites_lat) %>%
summarize(poc_ave = mean(POC)) %>%
mutate(lat = sites_lat,
long = sites_lon,
poc_ave12 = poc_ave) %>%
ungroup() %>%
dplyr::select("long", "lat", "poc_ave12")
poc_2014 <- matrix_poc_2014 %>%
pivot_longer(MODIS.20141001:MODIS.20160301,
names_to = "month",
names_prefix = "MODIS.",
values_to = "POC") %>%
group_by(sites_lon, sites_lat) %>%
summarize(poc_ave = mean(POC)) %>%
mutate(lat = sites_lat,
long = sites_lon,
poc_ave14 = poc_ave) %>%
ungroup() %>%
dplyr::select("long", "lat", "poc_ave14")
all_poc <- base::merge(poc_2012, poc_2014, by = c("long", "lat"), all = TRUE) %>%
pivot_longer(poc_ave12:poc_ave14,
names_to = "year",
names_prefix = "poc_ave",
values_to = "poc_ave")
all_poc_yield <- base::merge (all_poc, combined_long, by = c("lat", "long", "year"), all = TRUE) %>%
dplyr::select("site", "long", "lat", "year", "poc_ave", "yield")
ggplot(data = all_poc_yield, aes(x = poc_ave, y = yield)) +
geom_point(aes(color = year)) +
theme_minimal()
## poc only ##
ggplot(data = all_poc_yield, aes(x = year, y = poc_ave, fill = year)) +
geom_violin() +
#geom_jitter(color="black", size=0.4, alpha=0.9) +
theme(
legend.position="none",
plot.title = element_text(size=11)
) +
theme_minimal()
#ggplot_build(p)$data
POC versus yield for 2012 and 2014 models
all_12 <- all_poc_yield %>%
filter(year == "12")
ggplot(data = all_12, aes(x = poc_ave, y = yield)) +
geom_point(color = "#F8766D") +
xlim(c(77,2000)) +
ylim(c(2500000, 11580000)) +
theme_minimal()
all_14 <- all_poc_yield %>%
filter(year == "14")
ggplot(data = all_14, aes(x = poc_ave, y = yield)) +
geom_point(color = "#00BFC4") +
xlim(c(77,2000)) +
ylim(c(2500000, 11580000)) +
theme_minimal()
all_poc_sst_yield <- base::merge (all_poc_yield, all_sst_yield, by = c("site","lat", "long", "year", "yield"), all = TRUE)
ggplot(data = all_poc_sst_yield, aes(x = sst_ave, y = poc_ave)) +
geom_point(aes(color = year)) +
geom_smooth(method = lm, color = "black", aes(linetype = year), se = FALSE) +
theme_minimal()